if (!require("dplyr")) {install.packages("dplyr"); require("dplyr")}
if (!require("Seurat")) {install.packages("Seurat"); require("Seurat")}
if (!require("patchwork")) {install.packages("patchwork"); require("patchwork")}
if (!require("knitr")) {install.packages("knitr"); require("knitr")}
if (!require("ggplot2")) {install.packages("ggplot2"); require("ggplot2")}
if (!require("BiocManager")) {install.packages("BiocManager"); require("BiocManager")}
if (!require("ggpmisc")) {install.packages("ggpmisc"); require("ggpmisc")}
if (!require("here")) {install.packages("here"); require("here")}
if (!require("devtools")) {install.packages("devtools"); require("devtools")}
if (!require("tidyverse")) {install.packages("tidyverse"); require("tidyverse")}
if (!require("openxlsx")) {install.packages("openxlsx"); require("openxlsx")}
if (!require("sjmisc")) {install.packages("sjmisc"); require("sjmisc")}
if (!require("gridExtra")) {install.packages("gridExtra"); require("gridExtra")}
if (!require("gplots")) {install.packages("gplots"); require("gplots")}
if (!require("ggvenn")) {install.packages("ggvenn"); require("ggvenn")}
if (!require("pheatmap")) {install.packages("pheatmap"); require("pheatmap")}
if (!require("ggrepel")) {install.packages("ggrepel"); require("ggrepel")}
if (!require("reshape2")) {install.packages("reshape2"); require("reshape2")}
if (!require("gplots")) {install.packages("gplots"); require("gplots")}
if (!require("viridis")) {install.packages("viridis"); require("viridis")}
if (!require("readxl")) {install.packages("readxl"); require("readxl")}
if (!require("tibble")) {install.packages("tibble"); require("tibble")}
set.seed((12345))
here()## [1] "/Users/sblanche/Desktop/FCG"
#Load the PB dataset
load(here("Dataset", "ATAC_Pseudobulk_cell_group.rds"))
load(here("Dataset", "ATAC_Pseudobulk_cell.rds"))
load(here("Dataset", "ATAC_Pseudobulk_group.rds"))
# load xlsx dataset
Protein_FXYvFXX <- read_excel("Dataset/FXYvFXX copy.xlsx")
Protein_MXXvFXX <- read_excel("Dataset/MXXvFXX copy.xlsx")
Protein_MXYvFXX <- read_excel("Dataset/MXYvFXX copy .xlsx")
Protein_MXYvFXY <- read_excel("Dataset/MXYvFXY copy .xlsx")
Protein_MXYvMXX <- read_excel("Dataset/MXYvMXX copy.xlsx")# gene list
gene_name <- unique(unlist(Protein_FXYvFXX$Gene))
#loop that makes a box plot of the expression of each gene in every cell population
for (i in gene_name) {
# Extract the specified column along with rownames
if (i %in% colnames(Pseudobulk.cell)) {
selected_column <- Pseudobulk.cell[, i, drop = FALSE]
# Save the results to a data frame with rownames
result_df <- data.frame(RowName = rownames(selected_column), Value = selected_column[, 1])
# Ensure the RowName factor levels are consistent
result_df$RowName <- factor(result_df$RowName, levels = c("PT-S1", "PT-S2-f/mKO", "PT-S2-mWT", "PT-S3-f/mKO", "PT-S3-mWT", "LOH", "TAL", "DCT","PC", "IC-A", "IC-B", "Vascular", "Interstitial", "Immune", "Pod", "PEC"))
# Create a box plot of result_df
f1 <- ggplot(result_df, aes(x = RowName, y = Value)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = i, x = "Cell Type", y = "Expression")
} else {
# Create a placeholder plot indicating the gene was not found
f1 <- ggplot() +
annotate("text", x = 1, y = 1, label = paste("Gene", i, "not found in data frame"), size = 5, color = "red") +
theme_void() +
labs(title = i)
}
# Print plots
print(f1)
}The genes Umod, Kcnj10, Scnn1b, Cldn7, Cldn8, Kcnj1, Aqp3, Aqp4, Atp6v1b1, Sgk1, Trpv4, and Slc26a4 are expressed in both the cortex and the medulla however, we are unable to calculate a ratio between cortex and medulla for these genes at this time to normalize.
#clean raw protein data
# List of data frames
protein_dfs <- list(Protein_FXYvFXX, Protein_MXXvFXX, Protein_MXYvFXX, Protein_MXYvFXY, Protein_MXYvMXX)
# Names for each cleaned dataframe
protein_names <- c("Protein_FXYvFXX_1", "Protein_MXXvFXX_1", "Protein_MXYvFXX_1", "Protein_MXYvFXY_1", "Protein_MXYvMXX_1")
# Initialize an empty list to store the cleaned data frames
cleaned_protein_dfs <- list()
# For loop to clean each data frame
for (i in seq_along(protein_dfs)) {
cleaned_protein_dfs[[protein_names[i]]] <- protein_dfs[[i]] %>%
select("Gene", "Protein", matches("MXY", ignore.case = TRUE), matches("MXX", ignore.case = TRUE), matches("FXX", ignore.case = TRUE), matches("FXY", ignore.case = TRUE), "Difference")
}
# Access cleaned dataframe
Protein_FXYvFXX_1 <- cleaned_protein_dfs$Protein_FXYvFXX_1
Protein_MXXvFXX_1 <- cleaned_protein_dfs$Protein_MXXvFXX_1
Protein_MXYvFXX_1 <- cleaned_protein_dfs$Protein_MXYvFXX_1
Protein_MXYvFXY_1 <- cleaned_protein_dfs$Protein_MXYvFXY_1
Protein_MXYvMXX_1 <- cleaned_protein_dfs$Protein_MXYvMXX_1
head(Protein_FXYvFXX_1)This is the cleaned version of the raw data.
# Subset for 'FL' and 'CL' suffixed proteins
suffix_data_FXYvFXX <- Protein_FXYvFXX_1 %>%
filter(grepl("-FL$|-Cl$", Protein)) %>%
mutate(
ProteinBase = gsub("-?FL$|-?Cl$", "", Protein), # Create base protein name
Suffix = ifelse(grepl("FL$|-FL$", Protein), "FL", "Cl") # Identify whether the protein has FL or Cl suffix
)
# Separate 'FL' and 'CL' data
fl_data_FXYvFXX <- suffix_data_FXYvFXX %>% filter(Suffix == "FL")
cl_data_FXYvFXX <- suffix_data_FXYvFXX %>% filter(Suffix == "Cl")
# Join 'FL' and 'Cl' datasets by ProteinBase and Gene
combined_suffix_FXYvFXX <- full_join(fl_data_FXYvFXX, cl_data_FXYvFXX, by = c("ProteinBase", "Gene"), suffix = c("_FL", "_Cl"))
# Calculate unweighted average between FL and Cl values
Protein_FXYvFXX_uw_avg <- combined_suffix_FXYvFXX %>%
rowwise() %>%
mutate(
`Mean of FXX` = mean(c(`Mean of FXX_FL`, `Mean of FXX_Cl`), na.rm = TRUE),
`Mean of FXY` = mean(c(`Mean of FXY_FL`, `Mean of FXY_Cl`), na.rm = TRUE),
Difference = `Mean of FXY` - `Mean of FXX`
) %>%
ungroup() %>%
select(ProteinBase, Gene, `Mean of FXY`, `Mean of FXX`, Difference)%>%
rename(Protein = ProteinBase)
# Replace original rows with calculated means
# Perform the join
Protein_FXYvFXX_2 <- Protein_FXYvFXX_1 %>%
left_join(Protein_FXYvFXX_uw_avg, by = c("Gene")) %>%
rowwise() %>%
mutate(
`Mean of FXX` = coalesce(`Mean of FXX.y`, `Mean of FXX.x`),
`Mean of FXY` = coalesce(`Mean of FXY.y`, `Mean of FXY.x`),
Difference = coalesce(Difference.y, Difference.x),
Protein = coalesce(Protein.y, Protein.x),
) %>%
ungroup() %>%
select(Protein, Gene, `Mean of FXY`, `Mean of FXX`, Difference) %>%
distinct(Gene, Protein, .keep_all = TRUE) %>%
mutate_if(is.numeric, round, 3)
# merge back to og df
head(Protein_FXYvFXX_2)# Subset for 'FL' and 'CL' suffixed proteins
suffix_data_MXXvFXX <- Protein_MXXvFXX_1 %>%
filter(grepl("-FL$|-Cl$", Protein)) %>%
mutate(
ProteinBase = gsub("-?FL$|-?Cl$", "", Protein), # Create base protein name
Suffix = ifelse(grepl("FL$|-FL$", Protein), "FL", "Cl") # Identify whether the protein has FL or Cl suffix
)
# Separate 'FL' and 'CL' data
fl_data_MXXvFXX <- suffix_data_MXXvFXX %>% filter(Suffix == "FL")
cl_data_MXXvFXX <- suffix_data_MXXvFXX %>% filter(Suffix == "Cl")
# Join 'FL' and 'Cl' datasets by ProteinBase and Gene
combined_suffix_MXXvFXX <- full_join(fl_data_MXXvFXX, cl_data_MXXvFXX, by = c("ProteinBase", "Gene"), suffix = c("_FL", "_Cl"))
# Calculate unweighted average between FL and Cl values
Protein_MXXvFXX_uw_avg <- combined_suffix_MXXvFXX %>%
rowwise() %>%
mutate(
`Mean of MXX` = mean(c(`Mean of MXX_FL`, `Mean of MXX_Cl`), na.rm = TRUE),
`Mean of FXX` = mean(c(`Mean of FXX_FL`, `Mean of FXX_Cl`), na.rm = TRUE),
Difference = `Mean of MXX` - `Mean of FXX`
) %>%
ungroup() %>%
select(ProteinBase, Gene, `Mean of MXX`, `Mean of FXX`, Difference)%>%
rename(Protein = ProteinBase)
# Replace original rows with calculated means
# Perform the join
Protein_MXXvFXX_2 <- Protein_MXXvFXX_1 %>%
left_join(Protein_MXXvFXX_uw_avg, by = c("Gene")) %>%
rowwise() %>%
mutate(
`Mean of MXX` = coalesce(`Mean of MXX.y`, `Mean of MXX.x`),
`Mean of FXX` = coalesce(`Mean of FXX.y`, `Mean of FXX.x`),
Difference = coalesce(Difference.y, Difference.x),
Protein = coalesce(Protein.y, Protein.x),
) %>%
ungroup() %>%
select(Protein, Gene, `Mean of MXX`, `Mean of FXX`, Difference) %>%
mutate(Protein = sub(" - .*", "", Protein)) %>%
distinct(Gene, Protein, .keep_all = TRUE) %>%
mutate_if(is.numeric, round, 3)
# merge back to og df
head(Protein_MXXvFXX_2)# Subset for 'FL' and 'CL' suffixed proteins
suffix_data_MXYvFXX <- Protein_MXYvFXX_1 %>%
filter(grepl("-FL$|-Cl$", Protein)) %>%
mutate(
ProteinBase = gsub("-?FL$|-?Cl$", "", Protein), # Create base protein name
Suffix = ifelse(grepl("FL$|-FL$", Protein), "FL", "Cl") # Identify whether the protein has FL or Cl suffix
)
# Separate 'FL' and 'CL' data
fl_data_MXYvFXX <- suffix_data_MXYvFXX %>% filter(Suffix == "FL")
cl_data_MXYvFXX <- suffix_data_MXYvFXX %>% filter(Suffix == "Cl")
# Join 'FL' and 'Cl' datasets by ProteinBase and Gene
combined_suffix_MXYvFXX <- full_join(fl_data_MXYvFXX, cl_data_MXYvFXX, by = c("ProteinBase", "Gene"), suffix = c("_FL", "_Cl"))
# Calculate unweighted average between FL and Cl values
Protein_MXYvFXX_uw_avg <- combined_suffix_MXYvFXX %>%
rowwise() %>%
mutate(
`Mean of MXY` = mean(c(`Mean of MXY_FL`, `Mean of MXY_Cl`), na.rm = TRUE),
`Mean of FXX` = mean(c(`Mean of FXX_FL`, `Mean of FXX_Cl`), na.rm = TRUE),
Difference = `Mean of MXY` - `Mean of FXX`
) %>%
ungroup() %>%
select(ProteinBase, Gene, `Mean of MXY`, `Mean of FXX`, Difference)%>%
rename(Protein = ProteinBase)
# Replace original rows with calculated means
# Perform the join
Protein_MXYvFXX_2 <- Protein_MXYvFXX_1 %>%
left_join(Protein_MXYvFXX_uw_avg, by = c("Gene")) %>%
rowwise() %>%
mutate(
`Mean of MXY` = coalesce(`Mean of MXY.y`, `Mean of MXY.x`),
`Mean of FXX` = coalesce(`Mean of FXX.y`, `Mean of FXX.x`),
Difference = coalesce(Difference.y, Difference.x),
Protein = coalesce(Protein.y, Protein.x),
) %>%
ungroup() %>%
select(Protein, Gene, `Mean of MXY`, `Mean of FXX`, Difference) %>%
mutate(Protein = sub(" - .*", "", Protein)) %>%
distinct(Gene, Protein, .keep_all = TRUE) %>%
mutate_if(is.numeric, round, 3)
# merge back to og df
head(Protein_MXYvFXX_2)# Subset for 'FL' and 'CL' suffixed proteins
suffix_data_MXYvFXY <- Protein_MXYvFXY_1 %>%
filter(grepl("-FL$|-Cl$", Protein)) %>%
mutate(
ProteinBase = gsub("-?FL$|-?Cl$", "", Protein), # Create base protein name
Suffix = ifelse(grepl("FL$|-FL$", Protein), "FL", "Cl") # Identify whether the protein has FL or Cl suffix
)
# Separate 'FL' and 'CL' data
fl_data_MXYvFXY <- suffix_data_MXYvFXY %>% filter(Suffix == "FL")
cl_data_MXYvFXY <- suffix_data_MXYvFXY %>% filter(Suffix == "Cl")
# Join 'FL' and 'Cl' datasets by ProteinBase and Gene
combined_suffix_MXYvFXY <- full_join(fl_data_MXYvFXY, cl_data_MXYvFXY, by = c("ProteinBase", "Gene"), suffix = c("_FL", "_Cl"))
# Calculate unweighted average between FL and Cl values
Protein_MXYvFXY_uw_avg <- combined_suffix_MXYvFXY %>%
rowwise() %>%
mutate(
`Mean of MXY` = mean(c(`Mean of MXY_FL`, `Mean of MXY_Cl`), na.rm = TRUE),
`Mean of FXY` = mean(c(`Mean of FXY_FL`, `Mean of FXY_Cl`), na.rm = TRUE),
Difference = `Mean of MXY` - `Mean of FXY`
) %>%
ungroup() %>%
select(ProteinBase, Gene, `Mean of MXY`, `Mean of FXY`, Difference)%>%
rename(Protein = ProteinBase)
# Replace original rows with calculated means
# Perform the join
Protein_MXYvFXY_2 <- Protein_MXYvFXY_1 %>%
left_join(Protein_MXYvFXY_uw_avg, by = c("Gene")) %>%
rowwise() %>%
mutate(
`Mean of MXY` = coalesce(`Mean of MXY.y`, `Mean of MXY.x`),
`Mean of FXY` = coalesce(`Mean of FXY.y`, `Mean of FXY.x`),
Difference = coalesce(Difference.y, Difference.x),
Protein = coalesce(Protein.y, Protein.x),
) %>%
ungroup() %>%
select(Protein, Gene, `Mean of MXY`, `Mean of FXY`, Difference) %>%
mutate(Protein = sub(" - .*", "", Protein)) %>%
distinct(Gene, Protein, .keep_all = TRUE) %>%
mutate_if(is.numeric, round, 3)
# merge back to og df
head(Protein_MXYvFXY_2)# Subset for 'FL' and 'CL' suffixed proteins
suffix_data_MXYvMXX <- Protein_MXYvMXX_1 %>%
filter(grepl("-FL$|-Cl$", Protein)) %>%
mutate(
ProteinBase = gsub("-?FL$|-?Cl$", "", Protein), # Create base protein name
Suffix = ifelse(grepl("FL$|-FL$", Protein), "FL", "Cl") # Identify whether the protein has FL or Cl suffix
)
# Separate 'FL' and 'CL' data
fl_data_MXYvMXX <- suffix_data_MXYvMXX %>% filter(Suffix == "FL")
cl_data_MXYvMXX <- suffix_data_MXYvMXX %>% filter(Suffix == "Cl")
# Join 'FL' and 'Cl' datasets by ProteinBase and Gene
combined_suffix_MXYvMXX <- full_join(fl_data_MXYvMXX, cl_data_MXYvMXX, by = c("ProteinBase", "Gene"), suffix = c("_FL", "_Cl"))
# Calculate unweighted average between FL and Cl values
Protein_MXYvMXX_uw_avg <- combined_suffix_MXYvMXX %>%
rowwise() %>%
mutate(
`Mean of MXY` = mean(c(`Mean of MXY_FL`, `Mean of MXY_Cl`), na.rm = TRUE),
`Mean of MXX` = mean(c(`Mean of MXX_FL`, `Mean of MXX_Cl`), na.rm = TRUE),
Difference = `Mean of MXY` - `Mean of MXX`
) %>%
ungroup() %>%
select(ProteinBase, Gene, `Mean of MXY`, `Mean of MXX`, Difference)%>%
rename(Protein = ProteinBase)
# Replace original rows with calculated means
# Perform the join
Protein_MXYvMXX_2 <- Protein_MXYvMXX_1 %>%
left_join(Protein_MXYvMXX_uw_avg, by = c("Gene")) %>%
rowwise() %>%
mutate(
`Mean of MXY` = coalesce(`Mean of MXY.y`, `Mean of MXY.x`),
`Mean of MXX` = coalesce(`Mean of MXX.y`, `Mean of MXX.x`),
Difference = coalesce(Difference.y, Difference.x),
Protein = coalesce(Protein.y, Protein.x),
) %>%
ungroup() %>%
select(Protein, Gene, `Mean of MXY`, `Mean of MXX`, Difference) %>%
mutate(Protein = sub(" - .*", "", Protein)) %>%
distinct(Gene, Protein, .keep_all = TRUE) %>%
mutate_if(is.numeric, round, 3)
# merge back to og df
head(Protein_MXYvMXX_2)In this is step the data is processed further by calculating the mean for proteins that have multiple measurements. These proteins are ENaC alpha and ENaC gamma.
# Subset the data to get proteins with prefix 'm' and 'c'
labeled_data_FXYvFXX <- Protein_FXYvFXX_2 %>%
filter(grepl("^m|^c", Protein)) %>%
mutate(
ProteinBase = gsub("^m|^c", "", Protein), # Create a base name without 'm' or 'c' for merging later
Prefix = ifelse(grepl("^m", Protein), "m", "c") # Identify if it's 'm' or 'c'
)
# Separate 'm' and 'c' data for easy access
m_data_FXYvFXX <- labeled_data_FXYvFXX %>% filter(Prefix == "m")
c_data_FXYvFXX <- labeled_data_FXYvFXX %>% filter(Prefix == "c")
# join the 'm' and 'c' data on their ProteinBase (common protein names)
combined_data_FXYvFXX <- full_join(m_data_FXYvFXX, c_data_FXYvFXX, by = c("ProteinBase", "Gene"), suffix = c("_m", "_c"))
# Apply the weighted average formula (4c + m) / 5 & create difference column
Protein_FXYvFXX_w_avg <- combined_data_FXYvFXX %>%
rowwise() %>%
mutate(
`Mean of FXX` = ifelse(is.na(`Mean of FXX_c`) | is.na(`Mean of FXX_m`),
coalesce(`Mean of FXX_c`, `Mean of FXX_m`), # Use the available value if one is missing
(4 * `Mean of FXX_c` + `Mean of FXX_m`) / 5), # Otherwise, apply the weighted average
`Mean of FXY` = ifelse(is.na(`Mean of FXY_c`) | is.na(`Mean of FXY_m`),
coalesce(`Mean of FXY_c`, `Mean of FXY_m`),
(4 * `Mean of FXY_c` + `Mean of FXY_m`) / 5),
Difference = `Mean of FXX` - `Mean of FXY` # Recalculate the 'Difference' column
) %>%
ungroup() %>%
select(ProteinBase, Gene, `Mean of FXX`, `Mean of FXY`, Difference)
# Rename ProteinBase back to Protein in the updated subset for merging
Protein_FXYvFXX_3 <- Protein_FXYvFXX_w_avg %>%
rename(Protein = ProteinBase) %>%
mutate_if(is.numeric, round, 3)
head(Protein_FXYvFXX_3)# Subset the data to get proteins with prefix 'm' and 'c'
labeled_data_MXXvFXX <- Protein_MXXvFXX_2 %>%
filter(grepl("^m|^c", Protein)) %>%
mutate(
ProteinBase = gsub("^m|^c", "", Protein), # Create a base name without 'm' or 'c' for merging later
Prefix = ifelse(grepl("^m", Protein), "m", "c") # Identify if it's 'm' or 'c'
)
# Separate 'm' and 'c' data for easy access
m_data_MXXvFXX <- labeled_data_MXXvFXX %>% filter(Prefix == "m")
c_data_MXXvFXX <- labeled_data_MXXvFXX %>% filter(Prefix == "c")
# join the 'm' and 'c' data on their ProteinBase (common protein names)
combined_data_MXXvFXX <- full_join(m_data_MXXvFXX, c_data_MXXvFXX, by = c("ProteinBase", "Gene"), suffix = c("_m", "_c"))
# Apply the weighted average formula (4c + m) / 5 & create difference column
Protein_MXXvFXX_w_avg <- combined_data_MXXvFXX %>%
rowwise() %>%
mutate(
`Mean of MXX` = ifelse(is.na(`Mean of MXX_c`) | is.na(`Mean of MXX_m`),
coalesce(`Mean of MXX_c`, `Mean of MXX_m`), # Use the available value if one is missing
(4 * `Mean of MXX_c` + `Mean of MXX_m`) / 5), # Otherwise, apply the weighted average
`Mean of FXX` = ifelse(is.na(`Mean of FXX_c`) | is.na(`Mean of FXX_m`),
coalesce(`Mean of FXX_c`, `Mean of FXX_m`),
(4 * `Mean of FXX_c` + `Mean of FXX_m`) / 5),
Difference = `Mean of MXX` - `Mean of FXX` # Recalculate the 'Difference' column
) %>%
ungroup() %>%
select(ProteinBase, Gene, `Mean of MXX`, `Mean of FXX`, Difference)
# Rename ProteinBase back to Protein in the updated subset for merging
Protein_MXXvFXX_3 <- Protein_MXXvFXX_w_avg %>%
rename(Protein = ProteinBase) %>%
mutate_if(is.numeric, round, 3)
head(Protein_MXXvFXX_3)# Subset the data to get proteins with prefix 'm' and 'c'
labeled_data_MXYvFXX <- Protein_MXYvFXX_2 %>%
filter(grepl("^m|^c", Protein)) %>%
mutate(
ProteinBase = gsub("^m|^c", "", Protein), # Create a base name without 'm' or 'c' for merging later
Prefix = ifelse(grepl("^m", Protein), "m", "c") # Identify if it's 'm' or 'c'
)
# Separate 'm' and 'c' data for easy access
m_data_MXYvFXX <- labeled_data_MXYvFXX %>% filter(Prefix == "m")
c_data_MXYvFXX <- labeled_data_MXYvFXX %>% filter(Prefix == "c")
# join the 'm' and 'c' data on their ProteinBase (common protein names)
combined_data_MXYvFXX <- full_join(m_data_MXYvFXX, c_data_MXYvFXX, by = c("ProteinBase", "Gene"), suffix = c("_m", "_c"))
# Apply the weighted average formula (4c + m) / 5 & create difference column
Protein_MXYvFXX_w_avg <- combined_data_MXYvFXX %>%
rowwise() %>%
mutate(
`Mean of MXY` = ifelse(is.na(`Mean of MXY_c`) | is.na(`Mean of MXY_m`),
coalesce(`Mean of MXY_c`, `Mean of MXY_m`), # Use the available value if one is missing
(4 * `Mean of MXY_c` + `Mean of MXY_m`) / 5), # Otherwise, apply the weighted average
`Mean of FXX` = ifelse(is.na(`Mean of FXX_c`) | is.na(`Mean of FXX_m`),
coalesce(`Mean of FXX_c`, `Mean of FXX_m`),
(4 * `Mean of FXX_c` + `Mean of FXX_m`) / 5),
Difference = `Mean of MXY` - `Mean of FXX` # Recalculate the 'Difference' column
) %>%
ungroup() %>%
select(ProteinBase, Gene, `Mean of MXY`, `Mean of FXX`, Difference)
# Rename ProteinBase back to Protein in the updated subset for merging
Protein_MXYvFXX_3 <- Protein_MXYvFXX_w_avg %>%
rename(Protein = ProteinBase) %>%
mutate_if(is.numeric, round, 3)
head(Protein_MXYvFXX_3)# Subset the data to get proteins with prefix 'm' and 'c'
labeled_data_MXYvFXY <- Protein_MXYvFXY_2 %>%
filter(grepl("^m|^c", Protein)) %>%
mutate(
ProteinBase = gsub("^m|^c", "", Protein), # Create a base name without 'm' or 'c' for merging later
Prefix = ifelse(grepl("^m", Protein), "m", "c") # Identify if it's 'm' or 'c'
)
# Separate 'm' and 'c' data for easy access
m_data_MXYvFXY <- labeled_data_MXYvFXY %>% filter(Prefix == "m")
c_data_MXYvFXY <- labeled_data_MXYvFXY %>% filter(Prefix == "c")
# join the 'm' and 'c' data on their ProteinBase (common protein names)
combined_data_MXYvFXY <- full_join(m_data_MXYvFXY, c_data_MXYvFXY, by = c("ProteinBase", "Gene"), suffix = c("_m", "_c"))
# Apply the weighted average formula (4c + m) / 5 & create difference column
Protein_MXYvFXY_w_avg <- combined_data_MXYvFXY %>%
rowwise() %>%
mutate(
`Mean of MXY` = ifelse(is.na(`Mean of MXY_c`) | is.na(`Mean of MXY_m`),
coalesce(`Mean of MXY_c`, `Mean of MXY_m`), # Use the available value if one is missing
(4 * `Mean of MXY_c` + `Mean of MXY_m`) / 5), # Otherwise, apply the weighted average
`Mean of FXY` = ifelse(is.na(`Mean of FXY_c`) | is.na(`Mean of FXY_m`),
coalesce(`Mean of FXY_c`, `Mean of FXY_m`),
(4 * `Mean of FXY_c` + `Mean of FXY_m`) / 5),
Difference = `Mean of MXY` - `Mean of FXY` # Recalculate the 'Difference' column
) %>%
ungroup() %>%
select(ProteinBase, Gene, `Mean of MXY`, `Mean of FXY`, Difference)
# Rename ProteinBase back to Protein in the updated subset for merging
Protein_MXYvFXY_3 <- Protein_MXYvFXY_w_avg %>%
rename(Protein = ProteinBase) %>%
mutate_if(is.numeric, round, 3)
head(Protein_MXYvFXY_3)# Subset the data to get proteins with prefix 'm' and 'c'
labeled_data_MXYvMXX <- Protein_MXYvMXX_2 %>%
filter(grepl("^m|^c", Protein)) %>%
mutate(
ProteinBase = gsub("^m|^c", "", Protein), # Create a base name without 'm' or 'c' for merging later
Prefix = ifelse(grepl("^m", Protein), "m", "c") # Identify if it's 'm' or 'c'
)
# Separate 'm' and 'c' data for easy access
m_data_MXYvMXX <- labeled_data_MXYvMXX %>% filter(Prefix == "m")
c_data_MXYvMXX <- labeled_data_MXYvMXX %>% filter(Prefix == "c")
# join the 'm' and 'c' data on their ProteinBase (common protein names)
combined_data_MXYvMXX <- full_join(m_data_MXYvMXX, c_data_MXYvMXX, by = c("ProteinBase", "Gene"), suffix = c("_m", "_c"))
# Apply the weighted average formula (4c + m) / 5 & create difference column
Protein_MXYvMXX_w_avg <- combined_data_MXYvMXX %>%
rowwise() %>%
mutate(
`Mean of MXX` = ifelse(is.na(`Mean of MXX_c`) | is.na(`Mean of MXX_m`),
coalesce(`Mean of MXX_c`, `Mean of MXX_m`), # Use the available value if one is missing
(4 * `Mean of MXX_c` + `Mean of MXX_m`) / 5), # Otherwise, apply the weighted average
`Mean of MXY` = ifelse(is.na(`Mean of MXY_c`) | is.na(`Mean of MXY_m`),
coalesce(`Mean of MXY_c`, `Mean of MXY_m`),
(4 * `Mean of MXY_c` + `Mean of MXY_m`) / 5),
Difference = `Mean of MXX` - `Mean of MXY` # Recalculate the 'Difference' column
) %>%
ungroup() %>%
select(ProteinBase, Gene, `Mean of MXY`, `Mean of MXX`, Difference)
# Rename ProteinBase back to Protein in the updated subset for merging
Protein_MXYvMXX_3 <- Protein_MXYvMXX_w_avg %>%
rename(Protein = ProteinBase) %>%
mutate_if(is.numeric, round, 3)
head(Protein_MXYvMXX_3)The data frames above represent the data after one step of processing. In this step a weighted average of the medullary and cortex proteins was calculated. To compute the weighted average, we used the formula 4c+m/5 to reflect the assumed 4 to 1 ratio of cortex and medullary proteins in the sample. The proteins for which we used this formula for were AQP1, NHE3, NKCC2, NKAα1, NKAβ1, BKβ4, MUC1, ENaCα, ENaCγ, SGLT1, SPAK, WNK4, and AQP2.
# Normalize the "FXX" column to 1 and adjust other columns accordingly
Protein_FXYvFXX_4 <- Protein_FXYvFXX_3 %>%
group_by(Gene) %>%
mutate(
`Mean of FXY` = `Mean of FXY` / `Mean of FXX`[1],
Difference = Difference / `Mean of FXX`[1],
`Mean of FXX` = `Mean of FXX` / `Mean of FXX`[1],
) %>%
mutate_if(is.numeric, round, 3) %>%
select(-Protein)
head(Protein_FXYvFXX_4)# Normalize the "FXX" column to 1 and adjust other columns accordingly
Protein_MXXvFXX_4 <- Protein_MXXvFXX_3 %>%
group_by(Gene) %>%
mutate(
`Mean of MXX` = `Mean of MXX` / `Mean of FXX`[1],
Difference = Difference / `Mean of FXX`[1],
`Mean of FXX` = `Mean of FXX` / `Mean of FXX`[1],
) %>%
mutate_if(is.numeric, round, 3) %>%
select(-Protein)
head(Protein_MXXvFXX_4)# Normalize the "FXX" column to 1 and adjust other columns accordingly
Protein_MXYvFXX_4 <- Protein_MXYvFXX_3 %>%
group_by(Gene) %>%
mutate(
`Mean of FXX` = `Mean of FXX` / `Mean of MXY`[1],
Difference = Difference / `Mean of MXY`[1],
`Mean of MXY` = `Mean of MXY` / `Mean of MXY`[1],
) %>%
mutate_if(is.numeric, round, 3) %>%
select(-Protein)
head(Protein_MXYvFXX_4)# Normalize the "FXX" column to 1 and adjust other columns accordingly
Protein_MXYvFXY_4 <- Protein_MXYvFXY_3 %>%
group_by(Gene) %>%
mutate(
`Mean of FXY` = `Mean of FXY` / `Mean of MXY`[1],
Difference = Difference / `Mean of FXY`[1],
`Mean of MXY` = `Mean of MXY` / `Mean of MXY`[1],
) %>%
mutate_if(is.numeric, round, 3) %>%
select(-Protein)
head(Protein_MXYvFXY_4)# Normalize the "FXX" column to 1 and adjust other columns accordingly
Protein_MXYvMXX_4 <- Protein_MXYvMXX_3 %>%
group_by(Gene) %>%
mutate(
`Mean of MXY` = `Mean of MXY` / `Mean of MXX`[1],
Difference = Difference / `Mean of MXX`[1],
`Mean of MXX` = `Mean of MXX` / `Mean of MXX`[1],
) %>%
mutate_if(is.numeric, round, 3) %>%
select(-Protein)
head(Protein_MXYvMXX_4)In this step I normalized the data frame so the second sex characteristic is normalized to 1, and other columns are proportionally adjusted based on this normalization. For example in the comparison FXYvFXX, I normalize the Mean of FXX to 1. To do this I grouped by the Gene column to ensure calculations are performed separately for each gene. The I adjusted each by dividing its values by the second sex characteristic value within the group.
#rotate df and create gene column
RNA_cell_1 <- Pseudobulk.cell %>%
rotate_df() %>%
mutate(Gene = rownames(.))
RNA_cell_group_1 <- Pseudobulk.cell.group %>%
rotate_df() %>%
mutate(Gene = rownames(.))
RNA_group_1 <- Pseudobulk.group %>%
rotate_df() %>%
mutate(Gene = rownames(.))
#gene_name <- unique(unlist(Protein_FXYvFXX$Gene))
# Filter dataframe to keep only the selected genes
RNA_group_1 <- RNA_group_1 %>%
filter(Gene %in% gene_name)
head(RNA_group_1)In this step I rotated the data frame so the columns became rows and vice versa. I did this to make correlating the RNA and Protein data easier.
# Renaming the columns
RNA_group_2 <- RNA_group_1 %>%
dplyr::rename(
`Mean of MXX` = `F-KO`,
`Mean of FXX` = `F-WT`,
`Mean of FXY` = `M-KO`,
`Mean of MXY` = `M-WT`) %>%
mutate_if(is.numeric, round, 3)
head(RNA_group_2)In this step I renamed the columns so they matched the columns from the RNA data.
# Normalize the "WT" column to 1 and adjust other columns accordingly
RNA_group_3 <- RNA_group_2 %>%
group_by(Gene) %>%
mutate(
`Mean of MXX` = `Mean of MXX` / `Mean of MXY`[1],
`Mean of FXX` = `Mean of FXX` / `Mean of MXY`[1],
`Mean of FXY` = `Mean of FXY` / `Mean of MXY`[1],
`Mean of MXY` = `Mean of MXY` / `Mean of MXY`[1],) %>%
mutate_if(is.numeric, round, 3)
head(RNA_group_3)In this step I normalized the data frame so MXY is normalized to 1, and other columns are proportionally adjusted based on this normalization. To do this I grouped by the Gene column to ensure calculations are performed separately for each gene. The I adjusted each by dividing its values by the first WT value within the group.
# convert data frames into tidyform
Protein_MXYvFXX_5 <- Protein_MXYvFXX_4 %>%
dplyr::rename(
MXY = `Mean of MXY`,
FXX = `Mean of FXX`)%>%
pivot_longer(cols = MXY:FXX, names_to = "Group", values_to = "Value")
RNA_group_4 <- RNA_group_3 %>%
dplyr::rename(
MXX = `Mean of MXX`,
FXX = `Mean of FXX`,
FXY = `Mean of FXY`,
MXY = `Mean of MXY`)%>%
pivot_longer(cols = MXX:MXY, names_to = "Group", values_to = "Value")
# create Type column
RNA_group_4$Type <- "RNA"
Protein_MXYvFXX_5$Type <- "Protein"
# Merge RNA and Protein df
combined_df_MXYvFXX <- rbind(Protein_MXYvFXX_5, RNA_group_4)
# Reorder the X axis to be (MXY, FXY, FXX, MXX)
combined_df_MXYvFXX$Group <- factor(combined_df_MXYvFXX$Group, levels = c("MXY", "FXY", "FXX", "MXX"))
# Plot the data for the specific gene Aqp1
#no line
ggplot(combined_df_MXYvFXX %>%
filter(Group %in% c("MXY", "FXX"), Gene %in% c("Aqp1")),
aes(x = Group, y = Value, color = Type)) +
geom_point(size = 4) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
coord_cartesian(ylim = c(0.5, 3)) +
labs(title = paste("Scatter Plot of Value by Group and Type for", "Aqp1"),
x = "Group", y = "Value")#same graph, solid line
ggplot(combined_df_MXYvFXX %>%
filter(Group %in% c("MXY", "FXX"), Gene %in% c("Aqp1")),
aes(x = Group, y = Value, color = Type)) +
geom_point(size = 4) +
geom_line(aes(group = interaction(Gene, Group)), linetype = "solid", color = "purple",size = 1.2) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
coord_cartesian(ylim = c(0.5, 3)) +
labs(title = paste("Scatter Plot of Value by Group and Type for", "Aqp1"),
x = "Group", y = "Value")# Create a scatter plot the matches the values of the Gene and Group column but splits them so that the x axis is from protein and the Y axis is from RNA
# fixed axis, no line
ggplot(combined_df_MXYvFXX %>% filter(Group %in% c("MXY", "FXX")),
aes(x = Group, y = Value, color = Type)) +
geom_point() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
facet_wrap(~Gene, scales = "free_y") +
coord_cartesian(ylim = c(0.5, 5)) +
labs(title = "Scatter Plot of Value by Group and Type", x = "Group", y = "Value")## Fixed axis, solid line
ggplot(combined_df_MXYvFXX %>% filter(Group %in% c("MXY", "FXX")),
aes(x = Group, y = Value, color = Type)) +
geom_point() +
geom_line(aes(group = interaction(Gene, Group)), linetype = "solid", color = "purple") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
facet_wrap(~Gene, scales = "free_y") +
coord_cartesian(ylim = c(0.5, 5)) +
labs(title = "Scatter Plot of Value by Group and Type", x = "Group", y = "Value")# convert data frames into tidyform
Protein_MXYvFXY_5 <- Protein_MXYvFXY_4 %>%
dplyr::rename(
MXY = `Mean of MXY`,
FXY = `Mean of FXY`)%>%
pivot_longer(cols = MXY:FXY, names_to = "Group", values_to = "Value")
# create Type column
Protein_MXYvFXY_5$Type <- "Protein"
# Merge RNA and Protein df
combined_df_MXYvFXY <- rbind(Protein_MXYvFXY_5, RNA_group_4)
# Reorder the X axis to be (MXY, FXY, FXX, MXX)
combined_df_MXYvFXY$Group <- factor(combined_df_MXYvFXY$Group, levels = c("MXY", "FXY", "FXX", "MXX"))
# Create a scatter plot the matches the values of the Gene and Group column but splits them so that the x axis is from protein and the Y axis is from RNA
# fixed axis, no line
ggplot(combined_df_MXYvFXY %>% filter(Group %in% c("MXY", "FXY")),
aes(x = Group, y = Value, color = Type)) +
geom_point() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
facet_wrap(~Gene, scales = "free_y") +
coord_cartesian(ylim = c(0.5, 2.5)) +
labs(title = "Scatter Plot of Value by Group and Type", x = "Group", y = "Value")# Create a scatter plot the matches the values of the Gene and Group column but splits them so that the x axis is from protein and the Y axis is from RNA
## Fixed axis, solid line
ggplot(combined_df_MXYvFXY %>% filter(Group %in% c("MXY", "FXY")),
aes(x = Group, y = Value, color = Type)) +
geom_point() +
geom_line(aes(group = interaction(Gene, Group)), linetype = "solid", color = "purple") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
facet_wrap(~Gene, scales = "free_y") +
coord_cartesian(ylim = c(0.5, 2.5)) +
labs(title = "Scatter Plot of Value by Group and Type", x = "Group", y = "Value")# subset MXX and FXX
RNA_group_MXX.FXX <-subset(RNA_group_4, Group %in% c("MXX", "FXX"))
# create separate columns for "WT" and "KO" values, normalize FXX to 1
pivoted_RNA_group_MXX.FXX <- RNA_group_MXX.FXX %>%
pivot_wider(names_from = Group, values_from = Value) %>%
group_by(Gene, Type) %>%
mutate(
MXX = MXX / FXX[1],
FXX = FXX / FXX[1],) %>%
ungroup()
# recombine columns
RNA_group_MXX.FXX.2 <- pivoted_RNA_group_MXX.FXX %>%
pivot_longer(cols = c(MXX, FXX), names_to = "Group", values_to = "Value")
# convert data frames into tidyform
Protein_MXXvFXX_5 <- Protein_MXXvFXX_4 %>%
dplyr::rename(
MXX = `Mean of MXX`,
FXX = `Mean of FXX`)%>%
pivot_longer(cols = MXX:FXX, names_to = "Group", values_to = "Value")
# create Type column
Protein_MXXvFXX_5$Type <- "Protein"
# Merge RNA and Protein df
combined_df_MXXvFXX <- rbind(Protein_MXXvFXX_5, RNA_group_MXX.FXX.2)
# Reorder the X axis to be (MXY, FXY, FXX, MXX)
combined_df_MXXvFXX$Group <- factor(combined_df_MXXvFXX$Group, levels = c("MXY", "FXY", "FXX", "MXX"))
# Create a scatter plot the matches the values of the Gene and Group column but splits them so that the x axis is from protein and the Y axis is from RNA
# fixed axis, no line
ggplot(combined_df_MXXvFXX,
aes(x = Group, y = Value, color = Type)) +
geom_point() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
facet_wrap(~Gene, scales = "free_y") +
coord_cartesian(ylim = c(0.5, 2)) +
labs(title = "Scatter Plot of Value by Group and Type", x = "Group", y = "Value")# Create a scatter plot the matches the values of the Gene and Group column but splits them so that the x axis is from protein and the Y axis is from RNA
## Fixed axis, solid line
ggplot(combined_df_MXXvFXX,
aes(x = Group, y = Value, color = Type)) +
geom_point() +
geom_line(aes(group = interaction(Gene, Group)), linetype = "solid", color = "purple") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
facet_wrap(~Gene, scales = "free_y") +
coord_cartesian(ylim = c(0.5, 2)) +
labs(title = "Scatter Plot of Value by Group and Type", x = "Group", y = "Value")# Create a new dataframe that creates new columns to have the values for RNA and Protein in the same row
combined_df_MXYvFXX_2 <- combined_df_MXYvFXX %>%
filter(Group %in% c("MXY", "FXX")) %>%
select(-Difference) %>%
pivot_wider(names_from = Type, values_from = Value)
# Create a new column if RNA is greater than 1
combined_df_MXYvFXX_2$RNA_Greater_1 <- ifelse(combined_df_MXYvFXX_2$RNA > 1, "Yes", "No")
combined_df_MXYvFXX_2$Protein_Greater_1 <- ifelse(combined_df_MXYvFXX_2$Protein > 1, "Yes", "No")
#create a column if RNA_Greater_1 and Protein_Greater_1 are the same value
combined_df_MXYvFXX_2$Same <- ifelse(combined_df_MXYvFXX_2$RNA_Greater_1 == combined_df_MXYvFXX_2$Protein_Greater_1, "Yes", "No")
# Count how many values are yes or no in the column Same
combined_df_MXYvFXX_2 %>%
group_by(Same) %>%
summarise(Count = n())# Filter the rows where Same is "No" and select the Gene column
gene_list_no_MXYvFXX <- combined_df_MXYvFXX_2 %>%
filter(Same == "No") %>%
pull(Gene)
print(gene_list_no_MXYvFXX)## [1] "Slc9a3" "Wnk4" "Slc22a6" "Slc22a7" "Kcnj1" "Slc4a8" "Trpv4"
## [8] "Atp6v1b1"
# Create a new dataframe that creates new columns to have the values for RNA and Protein in the same row
combined_df_MXYvFXY_2 <- combined_df_MXYvFXY %>%
filter(Group %in% c("MXY", "FXY")) %>%
select(-Difference) %>%
pivot_wider(names_from = Type, values_from = Value)
# Create a new column if RNA is greater than 1
combined_df_MXYvFXY_2$RNA_Greater_1 <- ifelse(combined_df_MXYvFXY_2$RNA > 1, "Yes", "No")
combined_df_MXYvFXY_2$Protein_Greater_1 <- ifelse(combined_df_MXYvFXY_2$Protein > 1, "Yes", "No")
#create a column if RNA_Greater_1 and Protein_Greater_1 are the same value
combined_df_MXYvFXY_2$Same <- ifelse(combined_df_MXYvFXY_2$RNA_Greater_1 == combined_df_MXYvFXY_2$Protein_Greater_1, "Yes", "No")
# Count how many values are yes or no in the column Same
combined_df_MXYvFXY_2 %>%
group_by(Same) %>%
summarise(Count = n())# Filter the rows where Same is "No" and select the Gene column
gene_list_no_MXYvFXY <- combined_df_MXYvFXY_2 %>%
filter(Same == "No") %>%
pull(Gene)
print(gene_list_no_MXYvFXY)## [1] "Slc5a1" "Atp1a1" "Atp1b1" "Stk39" "Aqp1" "Aqp2"
## [7] "Aqp4" "Kcnmb4" "Muc1" "Slc4a4" "Slc34a1" "Slc22a7"
## [13] "Cldn2" "Lrp2" "Cubn" "Cldn7" "Cldn8" "Kcnj1"
## [19] "Slc26a4" "Slc4a8" "Atp6v1b1"
# Create a new dataframe that creates new columns to have the values for RNA and Protein in the same row
combined_df_MXXvFXX_2 <- combined_df_MXXvFXX %>%
select(-Difference) %>%
pivot_wider(names_from = Type, values_from = Value)
# Create a new column if RNA is greater than 1
combined_df_MXXvFXX_2$RNA_Greater_1 <- ifelse(combined_df_MXXvFXX_2$RNA > 1, "Yes", "No")
combined_df_MXXvFXX_2$Protein_Greater_1 <- ifelse(combined_df_MXXvFXX_2$Protein > 1, "Yes", "No")
#create a column if RNA_Greater_1 and Protein_Greater_1 are the same value
combined_df_MXXvFXX_2$Same <- ifelse(combined_df_MXXvFXX_2$RNA_Greater_1 == combined_df_MXXvFXX_2$Protein_Greater_1, "Yes", "No")
# Count how many values are yes or no in the column Same
combined_df_MXXvFXX_2 %>%
group_by(Same) %>%
summarise(Count = n())# Filter the rows where Same is "No" and select the Gene column
gene_list_no_MXXvFXX <- combined_df_MXXvFXX_2 %>%
filter(Same == "No") %>%
pull(Gene)
print(gene_list_no_MXXvFXX)## [1] "Slc12a1" "Kcnmb4" "Slc5a2" "Slc4a4" "Slc22a6" "Lrp2" "Cubn"
## [8] "Sgk1"
These tables help assess the correlation between RNA and protein expression levels. They takes into account whether RNA and protein levels are greater than 1, then counts how often these conditions match.
# subset MXY and FXX
subset_df_MXYvFXX <-subset(combined_df_MXYvFXX, Group %in% c("MXY", "FXX"))
# Calculate the absolute difference between RNA and Protein values
abs_diff_df_MXYvFXX <- subset_df_MXYvFXX %>%
select(-Difference) %>%
pivot_wider(names_from = Type, values_from = Value) %>%
mutate(`Absolute Difference` = abs(RNA - Protein))%>%
group_by(Gene) %>%
summarise(
`Absolute Difference MXY` = `Absolute Difference`[Group == "MXY"],
`Absolute Difference FXX` = `Absolute Difference`[Group == "FXX"],
`Sum Deltas` = sum(`Absolute Difference`[Group == "MXY"], na.rm = TRUE) +
sum(`Absolute Difference`[Group == "FXX"], na.rm = TRUE)) %>%
ungroup()
head(abs_diff_df_MXYvFXX)# subset MXY and FXY
subset_df_MXYvFXY <-subset(combined_df_MXYvFXY, Group %in% c("MXY", "FXY"))
# Calculate the absolute difference between RNA and Protein values
abs_diff_df_MXYvFXY <- subset_df_MXYvFXY %>%
select(-Difference) %>%
pivot_wider(names_from = Type, values_from = Value) %>%
mutate(`Absolute Difference` = abs(RNA - Protein))%>%
group_by(Gene) %>%
summarise(
`Absolute Difference MXY` = `Absolute Difference`[Group == "MXY"],
`Absolute Difference FXY` = `Absolute Difference`[Group == "FXY"],
`Sum Deltas` = sum(`Absolute Difference`[Group == "MXY"], na.rm = TRUE) +
sum(`Absolute Difference`[Group == "FXY"], na.rm = TRUE)) %>%
ungroup()
head(abs_diff_df_MXYvFXY)# Calculate the absolute difference between RNA and Protein values
abs_diff_df_MXXvFXX <- combined_df_MXXvFXX %>%
select(-Difference) %>%
pivot_wider(names_from = Type, values_from = Value) %>%
mutate(`Absolute Difference` = abs(RNA - Protein))%>%
group_by(Gene) %>%
summarise(
`Absolute Difference MXX` = `Absolute Difference`[Group == "MXX"],
`Absolute Difference FXX` = `Absolute Difference`[Group == "FXX"],
`Sum Deltas` = sum(`Absolute Difference`[Group == "MXX"], na.rm = TRUE) +
sum(`Absolute Difference`[Group == "FXX"], na.rm = TRUE)) %>%
ungroup()
head(abs_diff_df_MXXvFXX)# label metric of comparison
abs_diff_df_MXYvFXX$Metric <- "MXY vs FXX"
abs_diff_df_MXYvFXY$Metric <- "MXY vs FXY"
abs_diff_df_MXXvFXX$Metric <- "MXX vs FXX"
# join dfs
abs_diff_hm_df <- bind_rows(
mutate(abs_diff_df_MXYvFXX, Metric = "MXY vs FXX"),
mutate(abs_diff_df_MXYvFXY, Metric = "MXY vs FXY"),
mutate(abs_diff_df_MXXvFXX, Metric = "MXX vs FXX")
)
# order metrics
abs_diff_hm_df$Metric <- factor(abs_diff_hm_df$Metric, levels = c("MXY vs FXX", "MXY vs FXY", "MXX vs FXX"))
# Create the heat map with facets
ggplot(abs_diff_hm_df, aes(x = Metric, y = Gene, fill = `Sum Deltas`)) +
geom_tile() +
scale_fill_viridis(option = "magma") +
facet_wrap(~ Metric, scales = "free_x", ncol = 4) + # Line up metrics horizontally
theme_minimal() +
theme(
axis.text.x = element_blank(), # Remove x-axis text
axis.ticks.x = element_blank(), # Remove x-axis ticks
strip.text = element_text(size = 12, angle = 0), # Adjust facet label size and angle if needed
panel.spacing = unit(1, "lines"), # Space between facets
panel.grid = element_blank()
) +
labs(
title = "Heat Map of the Sum of the Differences",
x = "", # Remove x-axis label as it is redundant
y = "Gene",
fill = "Sum of Differences"
)# label metric of comparison
##abs_diff_df_MXYvFXX$Metric <- "MXY vs FXX"
##abs_diff_df_MXYvFXY$Metric <- "MXY vs FXY"
##abs_diff_df_MXXvFXX$Metric <- "MXX vs FXX"
# Common title for the plots
common_title <- "Heat Map of Sum of Differences"
# Create the MXYvFXX heat map
hm_MXYvFXX<- ggplot(abs_diff_df_MXYvFXX, aes(x = Metric, y = Gene, fill = `Sum Deltas`)) +
geom_tile() +
scale_fill_viridis(option = "magma") +
facet_wrap(~ Metric, scales = "free_x", ncol = 4) + # Line up metrics horizontally
theme_minimal() +
theme(
axis.text.x = element_blank(), # Remove x-axis text
axis.ticks.x = element_blank(), # Remove x-axis ticks
strip.text = element_text(size = 12, angle = 0), # Adjust facet label size and angle if needed
panel.spacing = unit(1, "lines"), # Space between facets
panel.grid = element_blank()
) +
labs(
title = "",
x = "", # Remove x-axis label as it is redundant
y = "Gene",
fill = "Sum of Differences"
)
# Create the MXYvFXY heat map
hm_MXYvFXY<- ggplot(abs_diff_df_MXYvFXY, aes(x = Metric, y = Gene, fill = `Sum Deltas`)) +
geom_tile() +
scale_fill_viridis(option = "magma") +
facet_wrap(~ Metric, scales = "free_x", ncol = 4) + # Line up metrics horizontally
theme_minimal() +
theme(
axis.text.x = element_blank(), # Remove x-axis text
axis.ticks.x = element_blank(), # Remove x-axis ticks
strip.text = element_text(size = 12, angle = 0), # Adjust facet label size and angle if needed
panel.spacing = unit(1, "lines"), # Space between facets
panel.grid = element_blank()
) +
labs(
title = "",
x = "", # Remove x-axis label as it is redundant
y = "Gene",
fill = "Sum of Differences"
)
# Create the MXXvFXX heat map
hm_MXXvFXX<- ggplot(abs_diff_df_MXXvFXX, aes(x = Metric, y = Gene, fill = `Sum Deltas`)) +
geom_tile() +
scale_fill_viridis(option = "magma") +
facet_wrap(~ Metric, scales = "free_x", ncol = 4) + # Line up metrics horizontally
theme_minimal() +
theme(
axis.text.x = element_blank(), # Remove x-axis text
axis.ticks.x = element_blank(), # Remove x-axis ticks
strip.text = element_text(size = 12, angle = 0), # Adjust facet label size and angle if needed
panel.spacing = unit(1, "lines"), # Space between facets
panel.grid = element_blank()
) +
labs(
title = "",
x = "", # Remove x-axis label as it is redundant
y = "Gene",
fill = "Sum of Differences"
)
# Arrange the heat maps side by side
grid.arrange(hm_MXYvFXX, hm_MXYvFXY, hm_MXXvFXX, ncol = 3, top = common_title)## R version 4.4.0 (2024-04-24)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sonoma 14.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Los_Angeles
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] readxl_1.4.3 viridis_0.6.5 viridisLite_0.4.2
## [4] reshape2_1.4.4 ggrepel_0.9.5 pheatmap_1.0.12
## [7] ggvenn_0.1.10 gplots_3.1.3.1 gridExtra_2.3
## [10] sjmisc_2.8.10 openxlsx_4.2.5.2 lubridate_1.9.3
## [13] forcats_1.0.0 stringr_1.5.1 purrr_1.0.2
## [16] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [19] tidyverse_2.0.0 devtools_2.4.5 usethis_2.2.3
## [22] here_1.0.1 ggpmisc_0.6.0 ggpp_0.5.8-1
## [25] BiocManager_1.30.23 ggplot2_3.5.1 knitr_1.48
## [28] patchwork_1.2.0 Seurat_5.1.0 SeuratObject_5.0.2
## [31] sp_2.1-4 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] RcppAnnoy_0.0.22 splines_4.4.0 later_1.3.2
## [4] bitops_1.0-7 cellranger_1.1.0 polyclip_1.10-6
## [7] fastDummies_1.7.3 lifecycle_1.0.4 rprojroot_2.0.4
## [10] globals_0.16.3 lattice_0.22-6 MASS_7.3-61
## [13] insight_0.20.2 magrittr_2.0.3 plotly_4.10.4
## [16] sass_0.4.9 rmarkdown_2.27 jquerylib_0.1.4
## [19] yaml_2.3.9 remotes_2.5.0 httpuv_1.6.15
## [22] sctransform_0.4.1 spam_2.10-0 zip_2.3.1
## [25] sessioninfo_1.2.2 pkgbuild_1.4.4 spatstat.sparse_3.1-0
## [28] reticulate_1.38.0 cowplot_1.1.3 pbapply_1.7-2
## [31] RColorBrewer_1.1-3 abind_1.4-5 pkgload_1.4.0
## [34] Rtsne_0.17 irlba_2.3.5.1 listenv_0.9.1
## [37] spatstat.utils_3.0-5 MatrixModels_0.5-3 goftest_1.2-3
## [40] RSpectra_0.16-1 spatstat.random_3.3-1 fitdistrplus_1.2-1
## [43] parallelly_1.37.1 leiden_0.4.3.1 codetools_0.2-20
## [46] tidyselect_1.2.1 farver_2.1.2 matrixStats_1.3.0
## [49] spatstat.explore_3.3-1 jsonlite_1.8.8 ellipsis_0.3.2
## [52] progressr_0.14.0 ggridges_0.5.6 survival_3.7-0
## [55] tools_4.4.0 ica_1.0-3 Rcpp_1.0.13
## [58] glue_1.7.0 xfun_0.46 withr_3.0.0
## [61] fastmap_1.2.0 fansi_1.0.6 SparseM_1.84-2
## [64] caTools_1.18.2 digest_0.6.36 timechange_0.3.0
## [67] R6_2.5.1 mime_0.12 colorspace_2.1-0
## [70] scattermore_1.2 gtools_3.9.5 tensor_1.5
## [73] spatstat.data_3.1-2 utf8_1.2.4 generics_0.1.3
## [76] data.table_1.15.4 httr_1.4.7 htmlwidgets_1.6.4
## [79] uwot_0.2.2 pkgconfig_2.0.3 gtable_0.3.5
## [82] lmtest_0.9-40 htmltools_0.5.8.1 profvis_0.3.8
## [85] dotCall64_1.1-1 scales_1.3.0 png_0.1-8
## [88] spatstat.univar_3.0-0 rstudioapi_0.16.0 tzdb_0.4.0
## [91] nlme_3.1-165 cachem_1.1.0 zoo_1.8-12
## [94] sjlabelled_1.2.0 KernSmooth_2.23-24 parallel_4.4.0
## [97] miniUI_0.1.1.1 pillar_1.9.0 vctrs_0.6.5
## [100] RANN_2.6.1 urlchecker_1.0.1 promises_1.3.0
## [103] xtable_1.8-4 cluster_2.1.6 evaluate_0.24.0
## [106] cli_3.6.3 compiler_4.4.0 rlang_1.1.4
## [109] future.apply_1.11.2 labeling_0.4.3 plyr_1.8.9
## [112] fs_1.6.4 stringi_1.8.4 deldir_2.0-4
## [115] munsell_0.5.1 lazyeval_0.2.2 spatstat.geom_3.3-2
## [118] quantreg_5.98 Matrix_1.7-0 RcppHNSW_0.6.0
## [121] hms_1.1.3 future_1.33.2 shiny_1.8.1.1
## [124] highr_0.11 ROCR_1.0-11 igraph_2.0.3
## [127] memoise_2.0.1 bslib_0.7.0 polynom_1.4-1